df <- read.csv('./data/crimedata_geoloc_preprocessed.csv')
df <- df %>% filter(state!='AK')
df$state <- as.factor(as.character(df$state))
df_filled <- read.csv('./data/FFFFFILED.csv')
df_filled$state <- as.factor(as.character(df_filled$state))options(scipen=999)
temp <- df_filled %>% select(ends_with('PerPop'))
temp <- exp(temp) -1
temp <- round(temp,2)
for (col in colnames(temp)){
df_filled[col] = temp[col]
}시각화를 위해서 미리 분위를 추가하자.
# murder cut point error
temp <- unname(quantile(df$murdPerPop))
temp[2] <- 0.1
df$murdPerPopQ <- cut(df$murdPerPop, breaks=temp, include.lowest = TRUE)
levels(df$murdPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$murdPerPopQ <- ordered(df$murdPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# robber
df$rapesPerPopQ <- cut(df$rapesPerPop,
breaks=unname(quantile(df$rapesPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$rapesPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$rapesPerPopQ <- ordered(df$rapesPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# robber
df$robbbPerPopQ <- cut(df$robbbPerPop,
breaks=unname(quantile(df$robbbPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$robbbPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$robbbPerPopQ <- ordered(df$robbbPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# assaultPerPop
df$assaultPerPopQ <- cut(df$assaultPerPop,
breaks=unname(quantile(df$assaultPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$assaultPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$assaultPerPopQ <- ordered(df$assaultPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# burglPerPop
df$burglPerPopQ <- cut(df$burglPerPop,
breaks=unname(quantile(df$burglPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$burglPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$burglPerPopQ <- ordered(df$burglPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# larcPerPop
df$larcPerPopQ <- cut(df$larcPerPop,
breaks=unname(quantile(df$larcPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$larcPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$larcPerPopQ <- ordered(df$larcPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# autoTheftPerPop
df$autoTheftPerPopQ <- cut(df$autoTheftPerPop,
breaks=unname(quantile(df$autoTheftPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$autoTheftPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$autoTheftPerPopQ <- ordered(df$autoTheftPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# arsonsPerPop
df$arsonsPerPopQ <- cut(df$arsonsPerPop,
breaks=unname(quantile(df$arsonsPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df$arsonsPerPopQ) <- c('very low', 'low', 'high', 'very high')
df$arsonsPerPopQ <- ordered(df$arsonsPerPopQ, levels=c('very low', 'low', 'high', 'very high'))temp <- unname(quantile(df_filled$murdPerPop))
temp[2] <- 0.1
df_filled$murdPerPopQ <- cut(df_filled$murdPerPop, breaks=temp, include.lowest = TRUE)
levels(df_filled$murdPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$murdPerPopQ <- ordered(df_filled$murdPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# robber
df_filled$rapesPerPopQ <- cut(df_filled$rapesPerPop,
breaks=unname(quantile(df_filled$rapesPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$rapesPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$rapesPerPopQ <- ordered(df_filled$rapesPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# robber
df_filled$robbbPerPopQ <- cut(df_filled$robbbPerPop,
breaks=unname(quantile(df_filled$robbbPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$robbbPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$robbbPerPopQ <- ordered(df_filled$robbbPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# assaultPerPop
df_filled$assaultPerPopQ <- cut(df_filled$assaultPerPop,
breaks=unname(quantile(df_filled$assaultPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$assaultPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$assaultPerPopQ <- ordered(df_filled$assaultPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# burglPerPop
df_filled$burglPerPopQ <- cut(df_filled$burglPerPop,
breaks=unname(quantile(df_filled$burglPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$burglPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$burglPerPopQ <- ordered(df_filled$burglPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# larcPerPop
df_filled$larcPerPopQ <- cut(df_filled$larcPerPop,
breaks=unname(quantile(df_filled$larcPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$larcPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$larcPerPopQ <- ordered(df_filled$larcPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# autoTheftPerPop
df_filled$autoTheftPerPopQ <- cut(df_filled$autoTheftPerPop,
breaks=unname(quantile(df_filled$autoTheftPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$autoTheftPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$autoTheftPerPopQ <- ordered(df_filled$autoTheftPerPopQ, levels=c('very low', 'low', 'high', 'very high'))
# arsonsPerPop
df_filled$arsonsPerPopQ <- cut(df_filled$arsonsPerPop,
breaks=unname(quantile(df_filled$arsonsPerPop, na.rm = TRUE)),
include.lowest = TRUE)
levels(df_filled$arsonsPerPopQ) <- c('very low', 'low', 'high', 'very high')
df_filled$arsonsPerPopQ <- ordered(df_filled$arsonsPerPopQ, levels=c('very low', 'low', 'high', 'very high'))levels(df$state) <- c("Alabama",
"Arkansas",
"Arizona",
"California",
"Colorado",
"Connecticut",
"District of Columbia",
"Delaware",
"Florida",
"Georgia",
"Iowa",
"Idaho",
"Illinois",
"Indiana",
"Kansas",
"Kentucky",
"Louisiana",
"Massachusetts",
"Maryland",
"Maine",
"Michigan",
"Minnesota",
"Missouri",
"Mississippi",
"North Carolina",
"North Dakota",
"New Hampshire",
"New Jersey",
"New Mexico",
"Nevada",
"New York",
"Ohio",
"Oklahoma",
"Oregon",
"Pennsylvania",
"Rhode Island",
"South Carolina",
"South Dakota",
"Tennessee",
"Texas",
"Utah",
"Virginia",
"Vermont",
"Washington",
"Wisconsin",
"West Virginia",
"Wyoming")levels(df_filled$state) <- c("Alabama",
"Arkansas",
"Arizona",
"California",
"Colorado",
"Connecticut",
"District of Columbia",
"Delaware",
"Florida",
"Georgia",
"Iowa",
"Idaho",
"Illinois",
"Indiana",
"Kansas",
"Kentucky",
"Louisiana",
"Massachusetts",
"Maryland",
"Maine",
"Michigan",
"Minnesota",
"Missouri",
"Mississippi",
"North Carolina",
"North Dakota",
"New Hampshire",
"New Jersey",
"New Mexico",
"Nevada",
"New York",
"Ohio",
"Oklahoma",
"Oregon",
"Pennsylvania",
"Rhode Island",
"South Carolina",
"South Dakota",
"Tennessee",
"Texas",
"Utah",
"Virginia",
"Vermont",
"Washington",
"Wisconsin",
"West Virginia",
"Wyoming")geoinfo = df %>%
mutate(ViolentCrimes = ViolentCrimesPerPop * population,
nonViolentCrimes = nonViolPerPop * population) %>%
group_by(state) %>%
summarize(pop = sum(population),
murdPerPop = sum(murders, na.rm = TRUE) / sum(population),
rapesPerPop = sum(rapes, na.rm = TRUE) / sum(population),
robbbPerPop = sum(robberies, na.rm = TRUE) / sum(population),
assaultPerPop = sum(assaults, na.rm = TRUE) / sum(population),
burglariesPerPop = sum(burglaries, na.rm = TRUE) / sum(population),
larceniesPerPop = sum(larcenies, na.rm = TRUE) / sum(population),
autoTheftPerPop = sum(autoTheft, na.rm = TRUE) / sum(population),
arsonsPerPop = sum(arsons, na.rm = TRUE) / sum(population),
ViolentCrimesPerPop = sum(ViolentCrimes, na.rm = TRUE) / sum(population),
nonViolPerPop = sum(nonViolentCrimes, na.rm = TRUE) / sum(population))us_sf <- read_rds("data/shapefile/gadm36_USA_1_sf.rds")
us_sf <- us_sf %>%
filter(NAME_1!="Alaska") %>% # too far
filter(NAME_1!="Hawaii") %>%
filter(NAME_1!='Montana') %>% # no crime data
filter(NAME_1!='Nebraska')
us_sf <- us_sf %>% rename(state=NAME_1)murdPerPop, rapesPerPop, robbbPerPop, assaultPerPop, burglariesPerPop, larceniesPerPop, autoTheftPerPop, arsonsPerPop, communitiyname, state, population, lon, lat
## Warning: package 'crosstalk' was built under R version 3.5.3
## Warning: package 'plotly' was built under R version 3.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# shared data
df_general <- df %>%
select(ends_with('Pop'), ends_with('PopQ'), communitiyname, state, population, lon, lat) %>%
select(-ViolentCrimesPerPop, -nonViolPerPop)
# filled data has no pop
df_filled$population<- df$population
df_general_filled <- df_filled %>%
select(ends_with('Pop'), ends_with('PopQ'), communitiyname, state, population, lon, lat) %>%
select(-ViolentCrimesPerPop, -nonViolPerPop)
df_general <- df_general %>% gather('crime', 'Quantile', ends_with('PopQ'))
df_general_filled <- df_general_filled %>% gather('crime', 'Quantile', ends_with('PopQ'))i=1
for (col in str_sub(df_general$crime,1,-2)){
df_general[i, 'value'] <- df_general[i, col]
i = i + 1
}
i=1
for (col in str_sub(df_general_filled$crime,1,-2)){
df_general_filled[i, 'value'] <- df_general_filled[i, col]
i = i + 1
}
df_general <- df_general %>% select(-ends_with('Pop'))
df_general_filled <- df_general_filled %>% select(-ends_with('Pop'))df_sd <- SharedData$new(df_general)
df_sd_filled <- SharedData$new(df_general_filled)
# control
filter_select("select", "범죄 종류 (채우기 전)", df_sd, ~crime, multiple = FALSE)# crosstalk remove other layers'
g1 <- ggplot(data=df_sd) +
geom_point(aes(lon, lat, color=Quantile,
text = paste('state :', state, "\n",
'city:', communitiyname, "\n",
'crime:', crime, "\n",
'PerPop:', value, "\n")),
alpha=0.3) +
scale_colour_hue(h = c(90, 0), na.value = 'blue') +
xlim(c(-126.05, -66.85)) + ylim(c(20,53))+
labs(fill="per 100K") +
theme_void() +
theme(legend.position = 'none')
g2 <- ggplot(data=df_sd_filled) +
geom_point(aes(lon, lat, color=Quantile,
text = paste('state :', state, "\n",
'city:', communitiyname, "\n",
'crime:', crime, "\n",
'PerPop:', value, "\n")),
alpha=0.3) +
scale_colour_hue(h = c(90, 0), na.value = 'blue') +
xlim(c(-126.05, -66.85)) + ylim(c(20,53))+
labs(fill="per 100K") +
theme_void() +
theme(legend.position = 'none')library('RCurl')
image_file <- "USMAP.png"
txt <- RCurl::base64Encode(readBin(image_file, "raw", file.info(image_file)[1, "size"]), "txt")
gg1 <- ggplotly(g1, tooltip = 'text') %>%
layout(images=list(source=paste('data:image/png;base64', txt, sep=','),
xref='x',
yref='y',
x = -128,
y = 52,
sizex = 63,
sizey = 32,
sizing = "stretch",
#opacity = 0.4,
layer = "below"))
gg2 <- ggplotly(g2, tooltip = 'text') %>%
layout(images=list(source=paste('data:image/png;base64', txt, sep=','),
xref='x',
yref='y',
x = -128,
y = 52,
sizex = 63,
sizey = 32,
sizing = "stretch",
#opacity = 0.4,
layer = "below"))
bscols(gg1, gg2)